%
% FILE NAME:    miscModelCurationScript_20180921.m
% 
% PURPOSE: Script for performing a number of different curations, updates,
%          and corrections to HumanGEM. The changes are detailed below:
%
%          1) Correct the size of some non-standard model fields
%             - Some of the non-standard model fields (e.g., "rxnKEGGID") 
%               are not the correct size, and thus may cause confusion and
%               loss of alignment with the list of reactions (model.rxns)
%               to which they correspond. Therefore, they will be padded
%               with blank entries, just to make them the proper
%               dimensions. These will be updated properly elsewhere.
%
%          2) Update of EC number assignment
%             - The EC number associated with HMR_4365 will be updated.
%
%          3) The grRules for several reactions will be updated:
%             - Complex I (HMR_6921)
%             - Complex III (HMR_6918)
%             - Lactate dehydrogenase rxns (HMR_4281, HMR_4388, and HMR_4280)
%             - Phosphofructokinase (HMR_4379)
%             - Succinyl CoA synthetase (HMR_4147)
%             - ATP synthase (HMR_6916)
%
%          4) Update direction of citrate-malate antiporter
%             - The citrate-malate antiport rxn (HMR_4964) is currently
%               implemeneted in the wrong direction. It should transport
%               citrate from mitochondria to cytoplasm, and malate in the
%               opposite direction. The rxn will therefore be reversed.
%
%          5) Removal of HMG-CoA transporter
%             - The model allows HMR-CoA (3-hydroxy-3-methylglutaryl)
%               transport across the mitochondrial membrane (HMR_1572), 
%               which should not be possible. There is also a rxn imported
%               from Recon3D (HMGCOAtx) that transports the same compound
%               across the peroxisomal membrane. Both of these reactions 
%               will therefore be deleted from the model.
%
%          6) Update protein-related fields
%             - Since some changes were made to grRules, the model fields
%               related to proteins ("proteins", "prRules", "rxnProtMat")
%               will be re-generated by running the translateGrRules
%               function.
%


%% load humanGEM (if not already loaded)
if ~exist('ihuman','var')
    load('model/Human-GEM.mat');
end



%% Correct the size of some non-standard model fields
% Some of the non-standard model fields (e.g., "rxnKEGGID") are not the
% correct size, and thus may cause confusion and loss of alignment with the
% list of reactions (model.rxns) to which they correspond. Therefore, they
% will be padded with blank entries, just to make them the proper
% dimensions. These will be updated properly elsewhere.
f = {'rxnKEGGID';'rxnEHMNID';'rxnBiGGID';'rxnHepatoNET1ID';'rxnREACTOMEID'};
for i = 1:length(f)
    if length(ihuman.(f{i})) < length(ihuman.rxns)
        ihuman.(f{i})(end+1:length(ihuman.rxns)) = {''};
    end
end



%% Update of EC number assignment

% The following reaction needs to have its EC number updated:
%  HMR_4365: 2-phospho-D-glycerate[c] <=> 3-phospho-D-glycerate[c]
%   EC orig: EC:5.4.2.1;EC:5.4.2.4
%   EC new:  EC:5.4.2.11
if isequal(ihuman.eccodes(ismember(ihuman.rxns,'HMR_4365')),{'EC:5.4.2.1;EC:5.4.2.4'})
    ihuman.eccodes(ismember(ihuman.rxns,'HMR_4365')) = {'EC:5.4.2.11'};
end



%% Update of grRules

% The reaction for Complex I (HMR_6921) should include ENSG00000283447
% (NDUFS1) as part of its grRule. This gene will therefore be added to the
% end of the grRule, as "... or ENSG00000283447".
[~,rxn_ind] = ismember('HMR_6921',ihuman.rxns);
if ~contains(ihuman.grRules(rxn_ind),'ENSG00000283447')
    if contains(ihuman.grRules{rxn_ind},' and ')
        error('grRule contains AND expressions. The gene should be added manually to the rule.');
    end
    
    % add gene to end of the rxn grRule
    ihuman.grRules(rxn_ind) = strcat(ihuman.grRules(rxn_ind),' or ENSG00000283447');
    
    % find gene index; if it doesn't exist, add it to the model
    [~,gene_ind] = ismember('ENSG00000283447',ihuman.genes);
    if gene_ind == 0
        ihuman.genes(end + 1) = {'ENSG00000283447'};
        gene_ind = length(ihuman.genes);
    end
    
    % add gene to rxnGeneMat
    ihuman.rxnGeneMat(rxn_ind,gene_ind) = 1;
end


% The reaction for Complex III (HMR_6918) should include ENSG00000284493
% (UQCRC2) as part of its grRule. This gene will therefore be added to the
% end of the grRule, as "... or ENSG00000284493".
[~,rxn_ind] = ismember('HMR_6918',ihuman.rxns);
if ~contains(ihuman.grRules(rxn_ind),'ENSG00000284493')
    if contains(ihuman.grRules{rxn_ind},' and ')
        error('grRule contains AND expressions. The gene should be added manually to the rule.');
    end
    
    % add gene to end of the rxn grRule
    ihuman.grRules(rxn_ind) = strcat(ihuman.grRules(rxn_ind),' or ENSG00000284493');
    
    % find gene index; if it doesn't exist, add it to the model
    [~,gene_ind] = ismember('ENSG00000284493',ihuman.genes);
    if gene_ind == 0
        ihuman.genes(end + 1) = {'ENSG00000284493'};
        gene_ind = length(ihuman.genes);
    end
    
    % add gene to rxnGeneMat
    ihuman.rxnGeneMat(rxn_ind,gene_ind) = 1;
end


% The lactate dehydrogenase rxns (HMR_4281, HMR_4388, and HMR_4280) have
% differing grRules, but they should be the same. Their rules will
% therefore be merged by combining all genes that are present in any of the
% three reactions' grRules into a single grRule, separated by "or".
%  HMR_4281: H+[p] + NADH[p] + pyruvate[p] <=> L-lactate[p] + NAD+[p]
%  HMR_4388: H+[c] + NADH[c] + pyruvate[c] <=> L-lactate[c] + NAD+[c]
%  HMR_4280: H+[m] + NADH[m] + pyruvate[m] <=> L-lactate[m] + NAD+[m]
[~,rxn_ind] = ismember({'HMR_4281';'HMR_4388';'HMR_4280'},ihuman.rxns);
if length(unique(ihuman.grRules(rxn_ind))) > 1  % check that the grRules actually differ
    
    if any(contains(ihuman.grRules(rxn_ind),' and '))
        error('grRule(s) contains AND expressions, and should be updated manually.');
    end
    
    % combine rxnGeneMat rows and grRules for the rxns
    ihuman.rxnGeneMat(rxn_ind,:) = repmat(max(ihuman.rxnGeneMat(rxn_ind,:),[],1),length(rxn_ind),1);
    gene_inds = find(ihuman.rxnGeneMat(rxn_ind(1),:) == 1);
    ihuman.grRules(rxn_ind) = join(ihuman.genes(gene_inds),' or ');

end


% The reaction HMR_4379 is associated with ENSG00000160226, which encodes
% a protein, but the function is related to DNA damage/repair, and not the
% reaction with which it is currently associated:
%  HMR_4379: ATP[c] + fructose-6-phosphate[c] => ADP[c] + fructose-1,6-bisphosphate[c]
% Therefore, the gene should be removed from the associated grRule.
[~,rxn_ind] = ismember('HMR_4379',ihuman.rxns);
if contains(ihuman.grRules(rxn_ind),'ENSG00000160226')  % check to see if this change has already been implemented
    if contains(ihuman.grRules(rxn_ind),' and ')
        error('grRule(s) contains AND expressions, and should be updated manually.');
    end
    [~,gene_ind] = ismember('ENSG00000160226',ihuman.genes);
    ihuman.rxnGeneMat(rxn_ind,gene_ind) = 0;
    ihuman.grRules(rxn_ind) = join(ihuman.genes(ihuman.rxnGeneMat(rxn_ind,:) == 1),' or ');
end


% The reaction HMR_4147 is associated with ENSG00000107104, which encodes
% a protein, but the function does not appear to be relevant to that rxn:
%  HMR_4147: CoA[m] + GTP[m] + succinate[m] <=> GDP[m] + Pi[m] + succinyl-CoA[m]
% Therefore, the gene should be removed from the associated grRule.
[~,rxn_ind] = ismember('HMR_4147',ihuman.rxns);
if contains(ihuman.grRules(rxn_ind),'ENSG00000107104')  % check to see if this change has already been implemented
    if contains(ihuman.grRules(rxn_ind),' and ')
        error('grRule(s) contains AND expressions, and should be updated manually.');
    end
    [~,gene_ind] = ismember('ENSG00000107104',ihuman.genes);
    ihuman.rxnGeneMat(rxn_ind,gene_ind) = 0;
    ihuman.grRules(rxn_ind) = join(ihuman.genes(ihuman.rxnGeneMat(rxn_ind,:) == 1),' or ');
end


% ATP synthase (HMR_6916) is incorrectly associated with many genes, such 
% as V-ATPases. The associated grRule therefore needs to be updated such
% that it contains only genes associated with ATP synthase.
[~,rxn_ind] = ismember('HMR_6916',ihuman.rxns);
ATPgenes = {'ENSG00000099624';'ENSG00000110955';'ENSG00000116459';'ENSG00000124172';...
            'ENSG00000125375';'ENSG00000135390';'ENSG00000152234';'ENSG00000154518';...
            'ENSG00000154723';'ENSG00000159199';'ENSG00000165629';'ENSG00000167283';...
            'ENSG00000167863';'ENSG00000169020';'ENSG00000198899';'ENSG00000241468';...
            'ENSG00000241837';'ENSG00000123472';'ENSG00000171953';'ENSG00000249222';...
            'ENSG00000228253';'ENSG00000156411';'ENSG00000173915'};
        
% check to see if this change has already been implemented
if ~all(cellfun(@(g) contains(ihuman.grRules(rxn_ind),g), ATPgenes)) || (sum(ihuman.rxnGeneMat(rxn_ind,:)) ~= 23)
    
    % ensure the rule doesn't contain ANDs
    if contains(ihuman.grRules(rxn_ind),' and ')
        error('grRule(s) contains AND expressions, and should be updated manually.');
    end
    
    % get the indices of each gene
    [~,gene_ind] = ismember(ATPgenes,ihuman.genes);
    if any(gene_ind == 0)
        % add any new genes to the model
        ihuman.genes = [ihuman.genes; ATPgenes(gene_ind == 0)];
        [~,gene_ind] = ismember(ATPgenes,ihuman.genes);
        ihuman.rxnGeneMat(1,length(ihuman.genes)) = 0;  % add new columns to rxnGeneMat
    end
    
    % update the rxnGeneMat row corresponding to the rxn
    ihuman.rxnGeneMat(rxn_ind,:) = 0;
    ihuman.rxnGeneMat(rxn_ind,gene_ind) = 1;
    
    % update the grRule (all ORs/isozymes - should be curated to incorporate enzyme complexes later)
    ihuman.grRules(rxn_ind) = join(ihuman.genes(ihuman.rxnGeneMat(rxn_ind,:) == 1),' or ');
    
end



%% Update direction of citrate-malate antiporter
% The citrate-malate antiport reaction is currently backwards:
%  HMR_4964: citrate[c] + H+[c] + malate[m] => citrate[m] + H+[m] + malate[c]
% It should be transporting citrate from M to C, and malate from C to M.
% Therefore, the reaction will be reversed.
[~,rxn_ind] = ismember('HMR_4964',ihuman.rxns);

% first verify that this change has not yet been implemented, by determining
% if the stoichiometric coefficient for H+[m] in this reaction is positive
[~,mCompInd] = ismember('mitochondria',lower(ihuman.compNames));
met_ind = find(ismember(ihuman.metNames,'H+') & (ihuman.metComps == mCompInd));
if ihuman.S(met_ind,rxn_ind) > 0
    ihuman.S(:,rxn_ind) = -ihuman.S(:,rxn_ind);
end



%% Removal of HMG-CoA transporter
% The model allows transport of HMR-CoA (3-hydroxy-3-methylglutaryl) across
% the mitochondrial membrane, which should not be possible:
%   HMR_1572: HMG-CoA[c] <=> HMG-CoA[m]
% In addition, there is another reaction that transports the same compound
% across the peroxisomal membrane:
%   HMGCOAtx: HMG-CoA[c] <=> HMG-CoA[p]
% Both of these reactions will therefore be deleted from the model.

% first check if they have already been deleted
if any(ismember({'HMR_1572';'HMGCOAtx'},ihuman.rxns))  
    % Delete rxns using a modified version of "removeReactions", which will
    % automatically detect reaction-related fields and delete the
    % appropriate entries. This function is NOT run with the options to
    % remove unused mets, genes, and comps, so those should be evaluated
    % in a later stage.
    ihuman = removeReactionsFull(ihuman,{'HMR_1572';'HMGCOAtx'});
end



%% Update protein-related fields
% Since some of the grRules have changed, the associated protein fields
% need to be updated.

[ihuman.prRules,ihuman.proteins,ihuman.rxnProtMat] = translateGrRules(ihuman.grRules,'UniProt');



%% Save updated model file
save('humanGEM.mat','ihuman');

% remove intermediate variables
clear('ATPgenes','dim','f','gene_ind','gene_inds','i','mCompInd','met_ind','rxn_ind');




